perm filename A09.TEX[162,RWF] blob sn#824087 filedate 1986-09-03 generic text, type C, neo UTF8
COMMENT ⊗   VALID 00002 PAGES
C REC  PAGE   DESCRIPTION
C00001 00001
C00002 00002	\magnification\magstephalf
C00015 ENDMK
C⊗;
\magnification\magstephalf
\input macro.tex
\def\today{\ifcase\month\or
  January\or February\or March\or April\or May\or June\or
  July\or August\or September\or October\or November\or December\fi
  \space\number\day, \number\year}
\baselineskip 14pt
\parskip 6pt
\rm
\line{\sevenrm a09.tex[162,phy] \today\hfill}

\parskip 5pt
\bigskip
\line{\bf Analysis of Quicksort and Its Relatives\hfill}

Let $x=\sum↓{m≤a≤b≤n}{1\over b-a+c}$.

\smallskip
\def\boxit#1{\vbox{\hrule\hbox{\vrule\kern3pt
 \vbox{\kern3pt#1\kern3pt}\kern3pt\vrule}\hrule}}
%\setbox1=\vbox{\hsize 28pc \noindent \strut
\setbox1=\vbox{\noindent \strut
{\bf Heuristic:} Summing a messy summand over a multidimensional region,
introduce a new variable that completely determines the value of the
summand; here, $d=b-a+c$.
\strut}
\boxit{\box1}

\smallskip
\noindent
Eliminate one of the original variables; $b=a+d-c$. Then
$$x=\sum↓{m≤a≤a+d-c≤n}{1\over d}\,.$$

\smallskip
\def\boxit#1{\vbox{\hrule\hbox{\vrule\kern3pt
 \vbox{\kern3pt#1\kern3pt}\kern3pt\vrule}\hrule}}
\setbox2=\vbox{\noindent\strut
Next, Express the range of summation as
$$\sum↓{d\in S}\,\sum↓{a\in T↓d}f(d)=\sum↓{d\in S}f(d)\sum↓{a\in T↓d}
1=\sum↓{d\in S}f(d)\cdot |T↓d|\,.$$
\strut}
\boxit{\box2}

\noindent
Find $S$ by eliminating $a$ using the transitive law in all possible
ways; 
$$\eqalign{m≤a≤n-d+c&⊃d≤n+c-m\cr
a≤a+d-c&⊃c≤d\cr}$$
so $d\in S$ is expressed by $c≤d≤n+c-m$. For given~$d$, the constraints
on~$a$ are
$m≤a≤n-d+c$, expressing $a\in T↓d$.
Therefore,
$$\eqalignno{x&=\sum↓{c≤d≤n+c-m}\,\sum↓{m≤a≤n-d+c}{1\over d}\cr
\noalign{\smallskip}
&=\sum↓{c≤d≤n+c-m}{n-d+c-m+1\over d}\cr
\noalign{\smallskip}
&=(n+c-m+1)\sum↓{c≤d≤n+c-m}{1\over d}-\sum↓{c≤d≤n+c-m}1\,,\cr}$$
so
$$\sum↓{m≤a≤b≤n}{1\over b-a+c}=(n+c-m+1)(H↓{n+c-m}-H↓{c-1})-(n\dminus m+1)\,,$$

Now consider simple Quicksort, $Q(1,n)$, where recursive calls $Q(a,b)$
may occur for $1≤a≤b≤n$. We compute the probability that $(a,b)$ will occur,
by cases:

\smallskip
\display 20pt:$\bullet$:
$(1<a≤b<n)$

\def\boxit#1{\vbox{\hrule\hbox{\vrule\kern3pt
 \vbox{\kern3pt#1\kern3pt}\kern3pt\vrule}\hrule}}
\setbox3=\vbox{\hsize 15pc\noindent\strut
{\bf Heuristic:} change $<$ to $≤$, and $>$ to $≥$
\strut}

\smallskip
\display 20pt::
\boxit{\box3}

\display 20pt:$\bullet$:
$(2≤a≤b≤n-1)$

\display 20pt::
The call $Q(a,b)$ occurs if both $a-1$ and $b+1$ are chosen as thresholds
before any intermediate keys; the probability is
${b-a+3\choose 2}↑{-1}={2\over (b-a+3)(b-a+2)}$.

\smallskip
\display 20pt:$\bullet$:
$(1=a≤b≤n-1)$

\display 20pt::
The call $Q(1,b)$ occurs if $b+1$ is chosen as threshold before any smaller~$k$,
with probability ${1\over b+1}$.

\smallskip
\display 20pt:$\bullet$:
$(2≤a≤b=n)$

\display 20pt::
Analogously, ${1\over n-a+2}$.

\smallskip
\display 20pt:$\bullet$:
$(1=a≤b=n)$

\display 20pt::
This call is inevitable; the probability is 1.

\smallskip\noindent
For practice, we compute the expected number of calls. Let $P(a,b)$ be
the probability that $Q(a,b)$ occurs.
$$\sum↓{1≤a≤b≤n}P(a,b)=\sum↓{2≤a≤b≤n}{2\over (b-a+3)(b-a+2)}+
\sum↓{1≤b≤n-1}{1\over b+1}+\sum↓{2≤a≤n}{1\over n-a+2}+1\,.$$

\smallskip
\def\boxit#1{\vbox{\hrule\hbox{\vrule\kern3pt
 \vbox{\kern3pt#1\kern3pt}\kern3pt\vrule}\hrule}}
\setbox4=\vbox{\noindent \strut
{\bf Heuristic:} In summing a rational function like ${2\over (b-a+3)(b-a+2)}$
with a factorable denominator, use partial fractions to get a summand where
all denominators are linear. Here,
$${2\over (b-a+3)(b-a+2)}={2\over b-a+2}-{2\over b-a+3}$$
\strut}
\boxit{\box4}

\noindent
Then
$$\eqalign{\sum↓{1≤a≤b≤n}P(a,b)
&=2\sum↓{2≤a≤b≤n-1}{1\over b-a+2}-2\sum↓{2≤a≤b≤n-1}{1\over b-a+3}+
(H↓n-1)+(H↓n-1)+1\cr
\noalign{\smallskip}
&=2(n-1+2-2+1)(H↓{n-1+2-2}-H↓{2-1})\cr
\noalign{\smallskip}
&\qquad -2(n-1+3-2+1)(H↓{n-1+3-2}-H↓{3-1})+2H↓n-1\cr
\noalign{\smallskip}
&=2(H↓{n-1}-1)-2(n+1)\left(H↓n-{3\over 2}\,\right)+2H↓n-1\cr
\noalign{\smallskip}
&=2n\left(H↓n-{1\over n}-1\right)-2(n+1)\left(H↓n-{3\over 2}\,\right)+2H↓n-1\cr
\noalign{\smallskip}
&=-2-2n+3(n+1)-1=n\,.\cr}$$
This is confirmed, because each key is the threshold for exactly one call.

Now for a harder task:
how many comparisons are made? Two approaches are possible. In the
first, we sum probabilities over the set of possible comparisons. Let
$R(a,b)$ be the probability that $a$~is compared to~$b$, which is the
probability that one of $a$,~$b$ will be chosen as a threshold before any
intermediate key, or 
${2\over b-a+1}$.
$$\eqalign{\sum↓{1≤a<b≤n}{2\over b-a+1}&=\sum↓{1≤a≤b-1≤n-1}\cr
\noalign{\smallskip}
&=2\sum↓{1≤a≤b'≤n-1}{1\over b'-a+2}\cr
\noalign{\smallskip}
&=2\bigl((n-1+2-1+1)(H↓{n-1+2-1}-H↓{2-1})-(n-1-1+1)\bigr)\cr
\noalign{\smallskip}
&=2\bigl((n+1)(H↓n-1)-(n-1)\bigr)\cr
\noalign{\smallskip}
&=2(n+1)H↓n-4n=2n\ln n-O(n)\,.\cr}$$
A second approach is to sum the number of comparisons made in each call
on~$Q$, times the probability of that call. In call $Q(a,b)$, there are
$b-a$ comparisons. The total number of comparisons is then


\def\boxit#1{\vbox{\hrule\hbox{\vrule\kern3pt
 \vbox{\kern3pt#1\kern3pt}\kern3pt\vrule}\hrule}}
\setbox5=\vbox{\hsize 10pc \noindent \strut
Replace $H↓{n-1}$ by $H↓n-{1\over n}$
\strut}
$$\eqalign{2&\sum↓{2≤a≤b≤n-1}(b-a)\left({1\over b-a+2}-{1\over b-a+3}\right)
+\sum↓{1≤b≤n-1}{(b-1)\over b+1}\cr
\noalign{\smallskip}
&\qquad +\hbox{an equivalent symmetric term}+(n-1)\cr
\noalign{\smallskip}
&=6\sum↓{2≤a≤b≤n-1}{1\over b-a+3}\cr
\noalign{\smallskip}
&\qquad -4\sum↓{2≤a≤b≤n-1}{1\over b-a+2}+2
\left(n-1-2\sum↓{1≤b≤n-1}{1\over b+1}\,\right)+n-1\cr
\noalign{\smallskip}
&=6\bigl((n-1+3-2+1)(H↓{n-1+3-2}-H↓{3-1})-(n-1-2+1)\bigr)\cr
\noalign{\smallskip}
&\qquad -4\bigl((n-1+2-2+1)(H↓{n-1+2-1}-H↓{2-1})-(n-1-2+1)\bigr)\cr
\noalign{\smallskip}
&\qquad +2n-4(4n-H↓1)+(n-1)\cr
\noalign{\smallskip}
&=6\left((n+1)\left(H↓n-{3\over 2}\,\right)-(n-2)\right)\cr
\noalign{\smallskip}
&\qquad -4\bigl(n(H↓{n-1}-1)-(n-2)\bigr)
+3n-1-4(H↓n-1)\cr
\noalign{\smallskip}
&\boxit{\box5}\cr
\noalign{\smallskip}
&=H↓n\bigl(6(n+1)-4n-4\bigr)+n(-9-6+4+4+3)+(-9+12+4-8-1+4)\cr
\noalign{\smallskip}
&=2(n+1)H↓n-4n+2\cr
\noalign{\smallskip}
&=2n\ln n+O(n)\,.\cr}$$

Now we use a more sophisticated version of Quicksort, where calls
$Q(a,b)$ are only executed if the region to be sorted has size
$b-a+1≥s$; we assume $n≥s$. We find the expected number of comparisons
by summing $P(a,b)(b-a)$ for only $(b-a)+1≥s$.

\disleft 38pt::
{\bf Exercise:} Perform the summation.

To determine the expected number of remaining inversions, we determine
for each $(a,b)$ with $b-a≤s-2$ the probability that it occurs in
a partition (and is of course rejected for further partitioning). It
may occur with $a-1$ and $b+1$ being chosen in either order. We consider
the chance that $a-1$ and then $b+1$ are chosen as thresholds.
Then $a-1$ is chosen first, and $b+1$ second, out of the keys $a-1$
through $a+s-3$; if $a+s-3≤n$, this has probability ${1\over (s-1)(s-2)}$.
If $a+s-3>n$, or $a≥n-s+4$, it has probability ${1\over (n-a+2)(n-a+1)}$.
If it occurs, the expected number of inversions is 
$\sum↓{1≤a≤b≤n}S(a,b)\cdot\left.{b-a+1\choose 2}\right/2$
where $S(a,b)$ is the probability calculated above. Oops, make a special
case out of $a=1$ and/or $b=n$.

Now consider finding the key equal to $i$. No $Q(a,b)$ is executed unless
$a≤i≤b$. We can determine the probability that $Q(a,b)$ occurs, as before,
and perform the same summations over a diminished range. We can use the
relation
$$\sum↓{\scriptstyle 1≤a≤b≤n\atop\scriptstyle a≤i≤b}
f(a,b)=\sum↓{1≤a≤b≤n}f(a,b)-\sum↓{1≤a≤b≤i-1}f(a,b)-
\sum↓{i+1≤a≤b≤n}f(a,b)$$
to compute a correction term to the previous sums.

\smallskip
\disleft 38pt::
{\bf Exercise:} Evaluate the sums.

\smallskip
\disleft 38pt::
{\bf Exercise} (harder): Consider finding two keys $i$ and $j$, $i<j$.



\bigskip
\parindent0pt
\copyright 1986 Robert W. Floyd.
First draft (not published)
September 2, 1986.
%revised date;
%subsequently revised.

\bye